import pandas as pd
import numpy as np
HRTU_2=pd.read_csv("/content/drive/MyDrive/Datasets/HTRU_2.csv",header=None)
HRTU_2
df=HRTU_2.copy()
df=df.drop(columns=8)
from sklearn import preprocessing
rdf=pd.DataFrame(preprocessing.scale(df))
rdf
from sklearn.decomposition import PCA
pca=PCA(n_components=2)
ddf=pca.fit_transform(rdf)
ddf
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
e=[]
for i in np.arange(2,10):
clusters=KMeans(n_clusters=i)
clusters.fit(ddf)
e.append(clusters.inertia_)
ax=plt.figure(dpi=180).gca()
plt.plot(np.arange(2,10),e)
plt.xlabel("Clusters")
plt.ylabel("Squared sum of Error")
plt.show()
clusters= KMeans(n_clusters=3)
clusters.fit(ddf)
labels=clusters.labels_
ax=plt.figure(dpi=180).gca()
plt.scatter(ddf[:,0],ddf[:,1],c=labels,edgecolors='k',)
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score
ss=[]
for i in np.arange(2,10):
clusters=KMeans(n_clusters=i)
clusters.fit(ddf)
ss.append(silhouette_score(ddf,clusters.labels_))
ax=plt.figure(dpi=180).gca()
plt.plot(np.arange(2,10),ss)
plt.xlabel("Clusters")
plt.ylabel("Silhouette score")
plt.show()
from sklearn import cluster, datasets, mixture
nsamples=1500
circle=datasets.make_circles(nsamples,noise=0.01)
moon=datasets.make_moons(nsamples,noise=0.01)
blob=datasets.make_blobs(nsamples)
random=np.random.rand(nsamples,2),None
DS=[circle,moon,blob,random]
SDS=[]
from sklearn import preprocessing
for i,j in DS:
SDS.append(preprocessing.scale(i))
k=0
f,axis=plt.subplots(1,4,figsize=(40,8))
for ds in SDS:
ss=[]
for i in np.arange(2,10):
clusters=KMeans(n_clusters=i)
clusters.fit(ds)
ss.append(silhouette_score(ds,clusters.labels_))
axis[k].plot(np.arange(2,10),ss)
plt.xlabel("Clusters")
plt.ylabel("Silhouette score")
k=k+1
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from mpl_toolkits import mplot3d
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
hei_clust = AgglomerativeClustering(n_clusters=3,linkage='average')
hei_clust.fit(ddf)
new_labels = hei_clust.labels_
plt.figure(figsize=(12,9))
plt.scatter(ddf[:,0],ddf[:,1],c = new_labels,cmap='autumn_r', edgecolors='K',s=50)
Agglomerative on HRTU2
Agg=AgglomerativeClustering(n_clusters=3,linkage='single' )
H=Agg.fit(ddf)
new_labels=H.labels_
plt.figure(figsize=(12,9))
plt.scatter(ddf[:,0],ddf[:,1], c=new_labels,cmap='autumn_r', edgecolors='K',s=50)
Agg=AgglomerativeClustering(n_clusters=3,linkage='complete' )
H=Agg.fit(ddf)
new_labels=H.labels_
plt.figure(figsize=(12,9))
plt.scatter(ddf[:,0],ddf[:,1], c=new_labels,cmap='autumn_r', edgecolors='K',s=50)
DBSCAN on HRTU2
from sklearn.cluster import DBSCAN
db=DBSCAN(eps=0.8,metric="euclidean")
dbClust=db.fit(ddf)
new_labels=dbClust.labels_
plt.figure(figsize=(12,9))
plt.scatter(ddf[:,0],ddf[:,1], c=new_labels,cmap='jet', edgecolors='K',s=50)
for ds in SDS:
f,axis=plt.subplots(1, 6,figsize=(28,4))
for i in np.arange(2,8):
clusters= KMeans(n_clusters=i)
clusters.fit(ds)
labels=clusters.labels_
axis[i-2].scatter(ds[:,0],ds[:,1],c=labels,edgecolors='k',)
plt.show()
Agglomerative clustering on Toy Datasets
for ds in SDS:
f,axis=plt.subplots(1, 6,figsize=(28,4))
for i in np.arange(2,8):
hei_clust = AgglomerativeClustering(n_clusters=3,linkage='average')
hei_clust.fit(ds)
new_labels = hei_clust.labels_
axis[i-2].scatter(ds[:,0],ds[:,1],c=new_labels,edgecolors='k',)
plt.show()
DBSCAN on Toy Datasets
from sklearn.cluster import DBSCAN
for ds in SDS:
f,axis=plt.subplots(1, 6,figsize=(28,4))
for i in np.arange(2,8):
db=DBSCAN(eps=(i-1)/10.0,metric="euclidean")
clusters=db.fit(ds)
new_labels=clusters.labels_
axis[i-2].scatter(ds[:,0],ds[:,1],c=new_labels,edgecolors='k')
plt.show()
import numpy as np
from sklearn import cluster, datasets, mixture
from itertools import cycle, islice
np.random.seed(0)
no_of_samples=100
noisy_circles,label1=datasets.make_circles(n_samples=no_of_samples, factor=0.5, noise=.05)
d=preprocessing.scale(noisy_circles)
from scipy.spatial import distance_matrix
dist=distance_matrix(d,d)
dist
from scipy.cluster import hierarchy
Z=hierarchy.linkage(dist,'average')
dendro=hierarchy.dendrogram(Z,)
!cp "/content/drive/MyDrive/Colab Notebooks/Practical 6.ipynb" ./
!jupyter nbconvert --to html "Practical 6"